﻿namespace JoinBox.Basal;

using System;
using System.IO;
using System.Text;

// https://www.cnblogs.com/guyun/p/4262587.html
#if false  // 用法实例
#region 打开按钮
/// <summary>
/// 打开按钮
/// </summary>
/// <param name=“sender“></param>
/// <param name=“e“></param>
private void txtMenuOpen_Click(object sender, EventArgs e)
{
    string fName;
    OpenFileDialog openFileDialog = new OpenFileDialog();
    openFileDialog.InitialDirectory = "";// 注意这里写路径时要用c:而不是c:
    openFileDialog.Filter = "文本文档|*.txt";
    openFileDialog.RestoreDirectory = true;
    openFileDialog.FilterIndex = 1;
    if (openFileDialog.ShowDialog() == DialogResult.OK)
    {
        fName       = openFileDialog.FileName;
        txtBox.Text = System.IO.File.ReadAllText(fName,FileEncoding.EncodingType.GetType(fName));
    }
}
#endregion
#endif


public static partial class FileHelper
{
    public static string[] ReadAllLines(string path, Encoding? encoder = null)
    {
        if (encoder == null)
            encoder = EncodingTool.GetType(path);
        encoder ??= Encoding.Default;
        return File.ReadAllLines(path, encoder);
    }
}

/// <summary>
/// 获取文件的编码格式
/// </summary>
public class EncodingTool
{
    /// <summary>
    /// 给定文件的路径,读取文件的二进制数据,判断文件的编码类型
    /// </summary>
    /// <param name=“fileName“>文件路径</param>
    /// <returns>文件的编码类型</returns>
    public static Encoding? GetType(string fileName)
    {
        Encoding? encoding = null;
        if (!string.IsNullOrEmpty(fileName) && File.Exists(fileName))
        {
            var file = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
            encoding = GetType(file);
            file.Close();
            file.Dispose();
        }
        return encoding;
    }
    /// <summary>
    /// 通过给定的文件流,判断文件的编码类型
    /// </summary>
    /// <param name=“fs“>文件流</param>
    /// <returns>文件的编码类型</returns>
    public static Encoding GetType(FileStream fs)
    {
        // byte[] Unicode = new byte[] { 0xFF, 0xFE, 0x41 };
        // byte[] UnicodeBIG = new byte[] { 0xFE, 0xFF, 0x00 };
        // byte[] UTF8 = new byte[] { 0xEF, 0xBB, 0xBF }; // 带BOM
        var reVal = Encoding.Default;
        var r = new BinaryReader(fs, reVal);
        _ = int.TryParse(fs.Length.ToString(), out int i);
        byte[] ss = r.ReadBytes(i);
        if (IsUTF8Bytes(ss) || (ss[0] == 0xEF && ss[1] == 0xBB && ss[2] == 0xBF))
            reVal = Encoding.UTF8;
        else if (ss[0] == 0xFE && ss[1] == 0xFF && ss[2] == 0x00)
            reVal = Encoding.BigEndianUnicode;
        else if (ss[0] == 0xFF && ss[1] == 0xFE && ss[2] == 0x41)
            reVal = Encoding.Unicode;
        r.Close();
        return reVal;
    }
    /// <summary>
    /// 判断是否是不带 BOM 的 UTF8 格式
    /// </summary>
    /// <param name=“data“></param>
    /// <returns></returns>
    static bool IsUTF8Bytes(byte[] data)
    {
        int charByteCounter = 1; // 计算当前正分析的字符应还有的字节数
        byte curByte;            // 当前分析的字节.
        for (int i = 0; i < data.Length; i++)
        {
            curByte = data[i];
            if (charByteCounter == 1)
            {
                if (curByte >= 0x80)
                {
                    // 判断当前
                    while (((curByte <<= 1) & 0x80) != 0)
                        charByteCounter++;
                    // 标记位首位若为非0 则至少以2个1开始 如:110XXXXX...........1111110X
                    if (charByteCounter == 1 || charByteCounter > 6)
                        return false;
                }
            }
            else
            {
                // 若是UTF-8 此时第一位必须为1
                if ((curByte & 0xC0) != 0x80)
                    return false;
                charByteCounter--;
            }
        }
        if (charByteCounter > 1)
            throw new ArgumentException("非预期的byte格式");
        return true;
    }
}

